## Perspective Taking and Security Dilemma Thinking: replication data
#Kertzer, Brutger and Quek
#October 1, 2023

####### SCS Replication 4.R: this R file contains the code necessary to run the perspective taking experiment analysis in the supplementary appendix - run SCS Replication 1.R first!
# All of the following analyses were carried out using R version 4.3.1 GUI 1.77 (Big Sur ARM build 8238) on an M1 Pro Macbook Pro running MacOS Ventura. 

##### Appendix 1: Note on survey partner

## Table 1: Sample characteristics: Chinese sample

china1$age_0 <- recode(china1$age, binarize=TRUE, x=18,x2=19)
china1$age_1 <- recode(china1$age, binarize=TRUE, x=20,x2=29)
china1$age_2 <- recode(china1$age, binarize=TRUE, x=30,x2=39)
china1$age_3 <- recode(china1$age, binarize=TRUE, x=40,x2=49)
china1$age_4 <- recode(china1$age, binarize=TRUE, x=50,x2=100)

fullpop <- c(51.2,12.7,8.8,30.2,27.0,14.2,7.2,24.1,17.1,16.1,16.3,25.3,91.6)
adpop <- c(50.52, NA,NA,NA,NA,NA,NA,4.0,21.7,20.4,21.9,32.0,92.3)
sampledesc <- round(100*c(prop.table(table(china1$male))[2], prop.table(table(china1$region))[c(4,3,2,1,6,5)], prop.table(table(china1$age_0))[2], prop.table(table(china1$age_1))[2], prop.table(table(china1$age_2))[2], prop.table(table(china1$age_3))[2], prop.table(table(china1$age_4))[2], prop.table(table(china1$Han))[2]), digits=1)
tab1 <- data.frame(cbind(fullpop,adpop,sampledesc))
rownames(tab1)[c(1,8:13)] <- c("Male", "<=19","20-29","30-39","40-49",">=50","Han")

stargazer(tab1, title="Sample characteristics: Chinese sample",summary=FALSE, digits=1)

## Table 2: Sample characteristics: US sample

usa1$age_0 <- recode(usa1$age, binarize=TRUE, x=18,x2=24)
usa1$age_1 <- recode(usa1$age, binarize=TRUE, x=25,x2=44)
usa1$age_2 <- recode(usa1$age, binarize=TRUE, x=45,x2=64)
usa1$age_3 <- recode(usa1$age, binarize=TRUE, x=65,x2=100)

sampledesc <- round(c(prop.table(table(usa1$male))[c(2,1)], prop.table(table(usa1$age_0))[2], prop.table(table(usa1$age_1))[2], prop.table(table(usa1$age_2))[2], prop.table(table(usa1$age_3))[2],prop.table(table(usa1$educ0))[2], prop.table(table(usa1$educ1))[2], prop.table(table(usa1$educ2))[2], prop.table(table(usa1$educ3))[2]), digits=2)
target <- c(0.49,0.51,0.13,0.34,0.34,0.19,0.42,0.19,0.28,0.10)
tab2 <- data.frame(cbind(sampledesc,target))
rownames(tab2) <- c("Male", "Female", "18-24", "25-44", "45-64",">=65", "High school or less", "Some college/university", "College/university", "Graduate/professional school")

stargazer(tab2, title="Sample characteristics: US sample",summary=FALSE, digits=2)

## Figure 1: Young, urban, and educated Chinese overrepresented in Chinese sample

library(readxl)
library(tidyverse)
library(magrittr)

census <- read_xlsx("census.xlsx")
census$sample <- "Census (2010)"
china1$sample <- "Sample"

## Age ##
figa1a <- ggplot() + geom_histogram(data=census, aes(x=age, weight=total, after_stat(density), fill=sample), alpha=0.8) + geom_histogram(data=china1, aes(x=age, after_stat(density), fill=sample), alpha=0.8) + theme_bw() + labs(y="Density", x="Age") + theme(legend.position="none") + scale_fill_brewer(palette="Set1")
ggsave(file="figa1a.pdf", plot=figa1a, height=5,width=5, units="in")

## Gender ##
census.gender <- census %>% gather("gender", "num", c("male", "female")) 
sample.gender <- china1 %>% group_by(sex) %>% summarise(count = n()) %>% mutate(
  gender = case_when(
    sex == 1 ~ "Male",
    sex == 2 ~ "Female"
  )
) %>% na.omit %>% mutate(prop=count/sum(count))

sum.tab <- census.gender %>% group_by(gender) %>% summarise(count = sum(num, weight=total)) %>% mutate(prop=count/sum(count))
sum.tab$sample <- "Census (2010)"
sample.gender$sample <- "Sample"
sum.tab$gender[which(sum.tab$gender=="female")] <- "Female"
sum.tab$gender[which(sum.tab$gender=="male")] <- "Male"

figa1b <- ggplot() + geom_bar(data=sum.tab, aes(x=gender, y=prop, fill=sample), alpha=0.8, stat="identity") + geom_bar(data=sample.gender, aes(x=gender, y=prop, fill=sample), alpha=0.8, stat="identity") + theme_bw() + labs(y="Proportion", x="Gender") + theme(legend.position="none") + scale_fill_brewer(palette="Set1")
ggsave(file="figa1b.pdf", plot=figa1b, height=5,width=5, units="in")

## Region ##
census.region <- read_xlsx("census.xlsx", sheet = 'region')

census.region %<>% mutate(
    region = province_code,
    region_code = case_when(
      region %in% c(3, 30, 11, 28, 17) ~ "N",
      region %in% c(21, 20, 12) ~ "NE",
      region %in% c(27, 18, 34, 2, 5, 19, 26) ~ "E",
      region %in% c(13, 15, 16, 7, 8, 10) ~ "CS",
      region %in% c(4, 29, 9, 33, 31) ~ "SW",
      region %in% c(25, 6, 24, 23, 32) ~ "NW"
    ) %>% factor(levels = c("N", "NE", "E", "CS", "SW", "NW"))
  ) 

cen.sum.tab <- census.region %>% group_by(region_code) %>% dplyr::summarise(fre = sum(total)) %>% na.omit %>% mutate(prop=fre/sum(fre))
cen.sum.tab$sample="Census (2010)"

china1 %<>% mutate(
    region_code = case_when(
      region %in% c(3, 30, 11, 28, 17) ~ "N",
      region %in% c(21, 20, 12) ~ "NE",
      region %in% c(27, 18, 34, 2, 5, 19, 26) ~ "E",
      region %in% c(13, 15, 16, 7, 8, 10) ~ "CS",
      region %in% c(4, 29, 9, 33, 31) ~ "SW",
      region %in% c(25, 6, 24, 23, 32) ~ "NW"
    ) %>% factor(levels = c("N", "NE", "E", "CS", "SW", "NW"))
  )

china1 %<>% mutate(region2=case_when(region=="Central-Southern" ~ "CS", region=="Eastern" ~ "E", region=="Northeastern" ~ "NE", region=="Northern" ~ "N", region=="Northwestern" ~ "NW", region=="Southwestern" ~ "SW"))

sam.sum.tab <- china1 %>% group_by(region2) %>% dplyr::summarise(fre = n()) %>% na.omit %>% mutate(prop=fre/sum(fre))
sam.sum.tab$sample="Sample"

figa1c <- ggplot() + geom_bar(data=cen.sum.tab, aes(x=region_code, y=prop, fill=sample), alpha=0.8, stat="identity") + geom_bar(data=sam.sum.tab, aes(x=region2, y=prop, fill=sample), alpha=0.8, stat="identity") + theme_bw() + labs(y="Proportion", x="Region") + theme(legend.position="none") + scale_fill_brewer(palette="Set1")
ggsave(file="figa1c.pdf", plot=figa1c, height=5,width=5, units="in")

## Education ##
china1 %<>% mutate(
  edu = case_when(
    edu2 != 2 ~ max(1, edu1 - 1),
    edu2 == 2 ~ edu1,
    TRUE ~ NA_real_
  ),
  edu_plot = case_when(
    edu == 1 ~ "Never went\nto school",
    edu == 2 ~ "Primary",
    edu == 3 ~ "Middle\nschool",
    edu == 4 ~ "High\nschool",
    edu == 5 ~ "Vocational\nschool",
    edu == 6 ~ "Undergrad",
    edu %in% 7:8 ~ "Postgrad"
	) %>% factor(levels = c("Never went\nto school", "Primary", "Middle\nschool", "High\nschool", "Vocational\nschool", "Undergrad", "Postgrad")) 
)

census.edu <- read_xlsx("census.xlsx", sheet = 'edu')

census.edu %<>% gather("level", "count", 2:8) %>% group_by(level) %>% summarise(tot = sum(count)) %>% mutate(
  level = level %>% gsub("\\.", " ", x = .) %>% factor(levels = c("Never went to school", "Primary", "Middle school", "High school", "Vocational school", "Undergrad", "Postgrad")) 
) %>% mutate(prop=tot/sum(tot))

levels(census.edu$level) <- c("Never went\nto school", "Primary", "Middle\nschool", "High\nschool", "Vocational\nschool", "Undergrad", "Postgrad")

sam.edu <- china1 %>% group_by(edu_plot) %>% summarise(tot = n()) %>% na.omit %>% mutate(prop=tot/sum(tot))
sam.edu$level <- sam.edu$edu_plot

census.edu$sample="Census (2010)"
sam.edu$sample="Sample"

figa1d <- ggplot() + geom_bar(data=census.edu, aes(x=level, y=prop, fill=sample), alpha=0.8, stat="identity") + geom_bar(data=sam.edu, aes(x=level, y=prop, fill=sample), alpha=0.8, stat="identity") + theme_bw() + labs(y="Proportion", x="Education level") + theme(legend.position="none", axis.text.x=element_text(size=9)) + scale_fill_brewer(palette="Set1")
ggsave(file="figa1d.pdf", plot=figa1d, height=5,width=5, units="in")

## Ethnicity ##
census.ethnic <- read_xlsx("census.xlsx", sheet = 'ethnic')
sum.tab.cen <- census.ethnic %>% group_by(ethnic) %>% summarise(fre = sum(total)) %>% mutate(prop=fre/sum(fre))

sum.tab.sam <- china1 %>% filter(!is.na(ethnic)) %>% group_by(ethnic) %>% summarise(fre = n()) %>% mutate(prop=fre/sum(fre))

sum.tab.cen$ethnic2 <- sum.tab.sam$ethnic2 <- c("Han","Zhuang","Man","Hui","Miao","Uyghurs", "Tibetans","Others")

sum.tab.cen$sample="Census (2010)"
sum.tab.sam$sample="Sample"

figa1e <- ggplot() + geom_bar(data=sum.tab.cen, aes(x=ethnic2, y=prop, fill=sample), alpha=0.8, stat="identity") + geom_bar(data=sum.tab.sam, aes(x=ethnic2, y=prop, fill=sample), alpha=0.8, stat="identity") + theme_bw() + labs(y="Proportion", x="Ethnicity") + theme(legend.position="none", axis.text.x=element_text(size=9)) + scale_fill_brewer(palette="Set1") 
ggsave(file="figa1e.pdf", plot=figa1e, height=5,width=5, units="in")


## Sector ##
census.jobs <- read_xlsx("census.xlsx", sheet = 'job')

sum.tab.cen <- census.jobs %>% filter(id %in% c(1,2,4,5,6)) %>% group_by(id) %>% summarise(fre = sum(count)) %>% mutate(prop=fre/sum(fre))

sum.tab.sam <- china1 %>% filter(job %in% c(1,2,4,5,6)) %>% group_by(job) %>% summarise(fre = n()) %>% mutate(prop=fre/sum(fre))

sum.tab.cen$jobs2 <- sum.tab.sam$jobs2 <- c("Agriculture", "Commercial", "Labourer","Social\norganizations","Party and\nstate organs")
sum.tab.cen$sample="Census (2010)"
sum.tab.sam$sample="Sample"

figa1f <- ggplot() + geom_bar(data=sum.tab.cen, aes(x=jobs2, y=prop, fill=sample), alpha=0.8, stat="identity") + geom_bar(data=sum.tab.sam, aes(x=jobs2, y=prop, fill=sample), alpha=0.8, stat="identity") + theme_bw() + labs(y="Proportion", x="Sector") + theme(legend.position="none", axis.text.x=element_text(size=9)) + scale_fill_brewer(palette="Set1") 
ggsave(file="figa1f.pdf", plot=figa1f, height=5,width=5, units="in")


## Marital status ##

census.marital = read_xlsx("census.xlsx", sheet = 'marital')

sum.tab.cen <- census.marital %>% group_by(marital) %>% summarise(fre = sum(count)) %>% mutate(prop=fre/sum(fre))

sum.tab.sam <- china1 %>% filter(!is.na(marital)) %>% group_by(marital) %>% summarise(fre = n()) %>% mutate(prop=fre/sum(fre))

sum.tab.cen$marital2 <- sum.tab.sam$marital2 <- c("Single", "Married","Divorced","Widowed")

sum.tab.cen$sample="Census (2010)"
sum.tab.sam$sample="Sample"

figa1g <- ggplot() + geom_bar(data=sum.tab.cen, aes(x=marital2, y=prop, fill=sample), alpha=0.8, stat="identity") + geom_bar(data=sum.tab.sam, aes(x=marital2, y=prop, fill=sample), alpha=0.8, stat="identity") + theme_bw() + labs(y="Proportion", x="Marital status") + theme(legend.position="none") + scale_fill_brewer(palette="Set1") 
ggsave(file="figa1g.pdf", plot=figa1g, height=5,width=5, units="in")


## Urban vs rural

census.urban = read_xlsx("census.xlsx", sheet = 'urb')

sum.tab.cen <- census.urban %>% group_by(urban) %>% summarise(fre = sum(count)) %>%  mutate(prop=fre/sum(fre))

sum.tab.sam <- china1 %>% filter(!is.na(urban)) %>% group_by(urban) %>% summarise(fre = n()) %>%  mutate(prop=fre/sum(fre))

sum.tab.cen$sample="Census (2010)"
sum.tab.sam$sample="Sample"

sum.tab.cen$urban2 <- sum.tab.sam$urban2 <- c("Urban", "Rural")

figa1h <- ggplot() + geom_bar(data=sum.tab.cen, aes(x=urban2, y=prop, fill=sample), alpha=0.8, stat="identity") + geom_bar(data=sum.tab.sam, aes(x=urban2, y=prop, fill=sample), alpha=0.8, stat="identity") + theme_bw() + labs(y="Proportion", x="Sector") + theme(legend.position="none") + scale_fill_brewer(palette="Set1") 
ggsave(file="figa1h.pdf", plot=figa1h, height=5,width=5, units="in")


## Figure legend
df_comb <- bind_rows(china1, census)
temp <- ggplot(data=df_comb, aes(x=age, fill=sample, weight=total)) + geom_histogram() + scale_fill_brewer(palette="Set1") + labs(fill="Sample")
legendary <- cowplot::get_legend(temp)
figa1i <- grid::grid.draw(legendary)
ggsave(file="figa1i.pdf", plot=figa1i, height=5,width=5, units="in")

###### Appendix 4: Supplementary analysis

### Causal average complier effects
dat.dec <- subset(usa1, usa1$ch_incr==0)

usa1$attribution2[which(is.na(usa1$attribution2))] <- ""
l <- nchar(usa1$attribution2) #Define compliance based on the length of the text
q <- quantile(l[which(usa1$PT==1 & !is.na(usa1$policyChoice))], seq(0,0.5,by=0.05)) #Identifying quantiles
l.dec <- subset(l, usa1$ch_incr==0) #Subset of data just pertaining to decrease

out.mat <- matrix(NA, nrow=length(q), ncol=5) #q, ITT/%Compliers, B, Low CI, High CI
out.mat[,1] <- seq(0,50,by=5)
for (i in 1:length(q)){
	dat.dec$compl1 <- ifelse(l.dec >= q[i] & dat.dec$PT == 1,1,ifelse(l.dec < q[i] & dat.dec$PT==1,0,NA))
	p.compl1 <- (table(dat.dec$compl1)/sum(table(dat.dec$compl1)))[length(table(dat.dec$compl1))] #% compliers
	out.mat[i,2] <- (mean(dat.dec$policyChoice[which(dat.dec$PT==1)], na.rm=TRUE) -mean(dat.dec$policyChoice[which(dat.dec$PT==0)], na.rm=TRUE))/p.compl1	
	
	dat.dec$compl3 <- ifelse(dat.dec$PT==0,0,ifelse(l.dec >= q[i] & dat.dec$PT == 1,1,0))
	
	mod.iv <- ivreg(policyChoice ~ PT | compl3, data=dat.dec)
	out.mat[i,3] <- mod.iv$coef[2]
	se <- sqrt(vcov(mod.iv)[2,2])
	n <- mod.iv$n
	err <- qt(0.975, df=mod.iv$df.residual)
	out.mat[i,4] <- out.mat[i,3] - err*se
	out.mat[i,5] <- out.mat[i,3] + err*se
}

dat.inc <- subset(usa1, usa1$ch_incr==1)
l.inc <- subset(l, usa1$ch_incr==1) #Subset of data just pertaining to increasing

out.mat2 <- matrix(NA, nrow=length(q), ncol=4) #ITT/%Compliers, B, Low CI, High CI
for (i in 1:length(q)){
	dat.inc$compl1 <- ifelse(l.inc >= q[i] & dat.inc$PT == 1,1,ifelse(l.inc < q[i] & dat.inc$PT==1,0,NA))
	p.compl1 <- (table(dat.inc$compl1)/sum(table(dat.inc$compl1)))[length(table(dat.inc$compl1))] #% compliers
	out.mat2[i,1] <- (mean(dat.inc$policyChoice[which(dat.inc$PT==1)], na.rm=TRUE) -mean(dat.inc$policyChoice[which(dat.inc$PT==0)], na.rm=TRUE))/p.compl1	
	
	dat.inc$compl3 <- ifelse(dat.inc$PT==0,0,ifelse(l.inc >= q[i] & dat.inc$PT == 1,1,0))

	mod.iv <- ivreg(policyChoice ~ PT | compl3, data=dat.inc)
	out.mat2[i,2] <- mod.iv$coef[2]
	se <- sqrt(vcov(mod.iv)[2,2])
	n <- mod.iv$n
	err <- qt(0.975, df=mod.iv$df.residual)
	out.mat2[i,3] <- out.mat2[i,2] - err*se
	out.mat2[i,4] <- out.mat2[i,2] + err*se
}

round(out.mat2, 3)

out.mat3 <- cbind(round(out.mat,digits=2), round(out.mat2,digits=2))

#Bootstrapping CIs for ratio estimator

set.seed(43215)
B <- 1500

dat.dec$l.dec <- l.dec
out.boot <- matrix(NA, nrow=B, ncol=length(q)) 
for (i in seq_len(B)){
	k <- sample(1:nrow(dat.dec), nrow(dat.dec), replace=TRUE)
	temp <- dat.dec[k,]
	for (j in 1:length(q)){
		temp$compl1 <- ifelse(temp$l.dec >= q[j] & temp$PT==1,1,ifelse(temp$l.dec < q[j] & temp$PT==1,0,NA)) 
		p.compl1 <- (table(temp$compl1)/sum(table(temp$compl1)))[length(table(temp$compl1))] #% compliers
		out.boot[i,j] <- (mean(temp$policyChoice[which(temp$PT==1)], na.rm=TRUE) - mean(temp$policyChoice[which(temp$PT==0)], na.rm=TRUE))/p.compl1
		}
	}

out.boot2 <- round(cbind(apply(out.boot,2,mean,na.rm=TRUE), apply(out.boot,2,quantile,0.025, na.rm=TRUE), apply(out.boot,2,quantile,0.975,na.rm=TRUE)), digits=2)

##### And for ch_incr=1

dat.inc$l.inc <- l.inc
out.boot3 <- matrix(NA, nrow=B, ncol=length(q)) 
for (i in seq_len(B)){
	k <- sample(1:nrow(dat.inc), nrow(dat.inc), replace=TRUE)
	temp <- dat.inc[k,]
	for (j in 1:length(q)){
		temp$compl1 <- ifelse(temp$l.inc >= q[j] & temp$PT==1,1,ifelse(temp$l.inc < q[j] & temp$PT==1,0,NA)) 
		p.compl1 <- (table(temp$compl1)/sum(table(temp$compl1)))[length(table(temp$compl1))] #% compliers
		out.boot3[i,j] <- (mean(temp$policyChoice[which(temp$PT==1)], na.rm=TRUE) - mean(temp$policyChoice[which(temp$PT==0)], na.rm=TRUE))/p.compl1
		}
	}
		
out.boot4 <- round(cbind(apply(out.boot3,2,mean,na.rm=TRUE), apply(out.boot3,2,quantile,0.025, na.rm=TRUE), apply(out.boot3,2,quantile,0.975,na.rm=TRUE)), digits=2)	

#Table 3: Causal average complier effect (CACE) estimates, US sample
paste(out.mat3[,1], out.mat3[,2], paste("(",out.boot2[,2], ", ", out.boot2[,3],")", sep=""), out.mat3[,3],paste("(",out.mat3[,4],", ", out.mat3[,5],")",sep=""), out.mat3[,6], paste("(",out.boot4[,2], ", ", out.boot4[,3],")", sep=""), out.mat3[,7], paste("(", out.mat3[,8],", ", out.mat3[,9], ") \\",sep=""), sep="&") #For each panel, the second and third columns are ratio estimator with 95% CIs; the fourth and fifth are 2SLS estimates with 95% CIs


### Chinese data
Sys.setlocale(category = "LC_CTYPE", locale = "zh_CN.utf-8")
#Produce slimmer dataframe
cn.dat <- china1[c("policyChoice", "am_incr", "PT", "attribution2")]

#Preprocess using jieba
require(jiebaR)
cn.dat$seg_attrib <- segment(cn.dat$attribution2, worker(bylines = TRUE))
cn.dat$seg_attrib <- gsub("^c\\(|\\)$", "", cn.dat$seg_attrib) # clear 'c(' in the beginning and ')' in the end of the string
cn.dat$seg_attrib <- gsub(", ", " ", cn.dat$seg_attrib) # remove ,
cn.dat$seg_attrib <- gsub('"', '', cn.dat$seg_attrib) # remove "

#Address missingness and observations that use English letters but Chinese text

#Drop final two observations (they're missing, and produce problems)
cn.dat <- cn.dat[!is.na(cn.dat$am_incr),]

#Drop observations that use random English letters/phrases
j <- c(76,104,153,219,317,319,449,549,605,651,703,781,833,865,962,1031,1181,1190,1208,1212)

cn.dat$seg_attrib[j] <- ""

cn.dat$l <- nchar(cn.dat$seg_attrib)

q <- quantile(cn.dat$l[which(cn.dat$PT==1 & !is.na(cn.dat$policyChoice))], seq(0,0.5,by=0.05)) #Identifying quantiles

#Subset data on each treatment
df.dec <- subset(cn.dat, cn.dat$am_incr==0)
df.inc <- subset(cn.dat, cn.dat$am_incr==1)

#US decreases

out.mat <- matrix(NA, nrow=length(q), ncol=5) #q, ITT/%Compliers, B, Low CI, High CI
out.mat[,1] <- seq(0,50,by=5)
for (i in 1:length(q)){
	df.dec$compl1 <- ifelse(df.dec$l >= q[i] & df.dec$PT == 1,1,ifelse(df.dec$l < q[i] & df.dec$PT==1,0,NA))
	p.compl1 <- (table(df.dec$compl1)/sum(table(df.dec$compl1)))[length(table(df.dec$compl1))] #% compliers
	out.mat[i,2] <- (mean(df.dec$policyChoice[which(df.dec$PT==1)], na.rm=TRUE) -mean(df.dec$policyChoice[which(df.dec$PT==0)], na.rm=TRUE))/p.compl1	

	df.dec$compl3 <- ifelse(df.dec$PT==0,0,ifelse(df.dec$l >= q[i] & df.dec$PT == 1,1,0))
	
	mod.iv <- ivreg(policyChoice ~ PT | compl3, data=df.dec)
	out.mat[i,3] <- mod.iv$coef[2]
	se <- sqrt(vcov(mod.iv)[2,2])
	n <- mod.iv$n
	err <- qt(0.975, df=mod.iv$df.residual)
	out.mat[i,4] <- out.mat[i,3] - err*se
	out.mat[i,5] <- out.mat[i,3] + err*se
}

#US increases

out.mat2 <- matrix(NA, nrow=length(q), ncol=5) #q, ITT/%Compliers, B, Low CI, High CI
out.mat2[,1] <- seq(0,50,by=5)
for (i in 1:length(q)){
	df.inc$compl1 <- ifelse(df.inc$l >= q[i] & df.inc$PT == 1,1,ifelse(df.inc$l < q[i] & df.inc$PT==1,0,NA))
	p.compl1 <- (table(df.inc$compl1)/sum(table(df.inc$compl1)))[length(table(df.inc$compl1))] #% compliers
	out.mat2[i,2] <- (mean(df.inc$policyChoice[which(df.inc$PT==1)], na.rm=TRUE) -mean(df.inc$policyChoice[which(df.inc$PT==0)], na.rm=TRUE))/p.compl1	

	df.inc$compl3 <- ifelse(df.inc$PT==0,0,ifelse(df.inc$l >= q[i] & df.inc$PT == 1,1,0))
	
	mod.iv <- ivreg(policyChoice ~ PT | compl3, data=df.inc)
	out.mat2[i,3] <- mod.iv$coef[2]
	se <- sqrt(vcov(mod.iv)[2,2])
	n <- mod.iv$n
	err <- qt(0.975, df=mod.iv$df.residual)
	out.mat2[i,4] <- out.mat2[i,3] - err*se
	out.mat2[i,5] <- out.mat2[i,3] + err*se
}

round(out.mat2, 3)

out.mat3 <- cbind(round(out.mat,digits=2), round(out.mat2,digits=2))

#####Bootstrapped CIs for ratio estimator

set.seed(43215)
B <- 1500

out.boot <- matrix(NA, nrow=B, ncol=length(q)) 
for (i in seq_len(B)){
	k <- sample(1:nrow(df.dec), nrow(df.dec), replace=TRUE)
	temp <- df.dec[k,]
	for (j in 1:length(q)){
		temp$compl1 <- ifelse(temp$l >= q[j] & temp$PT==1,1,ifelse(temp$l < q[j] & temp$PT==1,0,NA)) 
		p.compl1 <- (table(temp$compl1)/sum(table(temp$compl1)))[length(table(temp$compl1))] #% compliers
		out.boot[i,j] <- (mean(temp$policyChoice[which(temp$PT==1)], na.rm=TRUE) - mean(temp$policyChoice[which(temp$PT==0)], na.rm=TRUE))/p.compl1
		}
	}

out.boot2 <- round(cbind(apply(out.boot,2,mean,na.rm=TRUE), apply(out.boot,2,quantile,0.025, na.rm=TRUE), apply(out.boot,2,quantile,0.975,na.rm=TRUE)), digits=2)

##### And for am_incr=1

out.boot3 <- matrix(NA, nrow=B, ncol=length(q)) 
for (i in seq_len(B)){
	k <- sample(1:nrow(df.inc), nrow(df.inc), replace=TRUE)
	temp <- df.inc[k,]
	for (j in 1:length(q)){
		temp$compl1 <- ifelse(temp$l >= q[j] & temp$PT==1,1,ifelse(temp$l < q[j] & temp$PT==1,0,NA)) 
		p.compl1 <- (table(temp$compl1)/sum(table(temp$compl1)))[length(table(temp$compl1))] #% compliers
		out.boot3[i,j] <- (mean(temp$policyChoice[which(temp$PT==1)], na.rm=TRUE) - mean(temp$policyChoice[which(temp$PT==0)], na.rm=TRUE))/p.compl1
		}
	}
		
out.boot4 <- round(cbind(apply(out.boot3,2,mean,na.rm=TRUE), apply(out.boot3,2,quantile,0.025, na.rm=TRUE), apply(out.boot3,2,quantile,0.975,na.rm=TRUE)), digits=2)	

#### Table 4: Causal average complier effect (CACE) estimates, Chinese sample

paste(out.mat3[,1], out.mat3[,2], paste("(",out.boot2[,2], ", ", out.boot2[,3],")", sep=""), out.mat3[,3],paste("(",out.mat3[,4],", ", out.mat3[,5],")",sep=""), out.mat3[,7], paste("(",out.boot4[,2], ", ", out.boot4[,3],")", sep=""), out.mat3[,8], paste("(", out.mat3[,9],", ", out.mat3[,10], ") \\",sep=""), sep="&") #For each panel, the second and third columns are ratio estimator with 95% CIs; the fourth and fifth are 2SLS estimates with 95% CIs

######### Balance tests

### Figure 2: Balance tests: China sample

pdf("figa2.pdf", height=11, width=11)
par(mfrow=c(3,3))

#Age
plot(density(china1[china1$PT==0,]$age, na.rm=T), xlab="Age", ylim=c(0,0.032), xlim=c(10,90), main="Age", lty=1, cex=1.2)
par(new=TRUE)
plot(density(china1[china1$PT==1,]$age, na.rm=T), xlab="", ylim=c(0,0.032), xlim=c(10,90), main="", axes=F, ylab="", lty=2)
legend("topleft", legend=c("Treatment 0", "Treatment PT"), lty=c(1,2), cex=1, bty="n")

#Gender
plot(density(china1[china1$PT==0,]$male, na.rm=T), xlab="Gender (1 = Male)", ylim=c(0,2), xaxt='n', main="Gender", lty=1, cex=1.2)
par(new=TRUE)
plot(density(china1[china1$PT==1,]$male, na.rm=T), xlab="", ylim=c(0,2), main="", axes=F, ylab="", lty=2)
legend("topleft", legend=c("Treatment 0", "Treatment PT"), lty=c(1,2), cex=1, bty="n")
xticks <- seq(0, 1, 1)
axis(1, at=xticks)

#Han ethnicity
plot(density(china1[china1$PT==0,]$Han, na.rm=T), xlab="Han (0 = No, 1 = Yes)", ylim=c(0,8), xlim=c(-0.2,1.2), xaxt='n', main="Han ethnicity", lty=1, cex=1.2)
par(new=TRUE)
plot(density(china1[china1$PT==1,]$Han, na.rm=T), xlab="", ylim=c(0,8), xlim=c(-0.2,1.2), main="", axes=F, ylab="", lty=2)
legend("topleft", legend=c("Treatment 0", "Treatment PT"), lty=c(1,2), cex=1, bty="n")
xticks <- seq(0, 1, 1)
axis(1, at=xticks)

#Income
plot(density(china1[china1$PT==0,]$income1, na.rm=T), xlab="Household income", ylim=c(0,2.5), xlim=c(-0.2, 1.2), xaxt='n', main="Household income", lty=1, cex=1.2)
par(new=TRUE)
plot(density(china1[china1$PT==1,]$income1, na.rm=T), xlab="", ylim=c(0,2.5), xlim=c(-0.2, 1.2), main="", axes=F, ylab="", lty=2)
legend("topleft", legend=c("Treatment 0", "Treatment PT"), lty=c(1,2), cex=1, bty="n")
xticks <- seq(0, 1, 0.5)
axis(1, at=xticks)

#Education

china1$university <- recode(china1$educ, binarize=TRUE, x=0.65,x2=1)

plot(density(china1[china1$PT==0,]$university, na.rm=T), xlab="University degree (0 = No, 1 = Yes)", ylim=c(0,2), xaxt='n', main="University degree", lty=1, cex=1.2)
par(new=TRUE)
plot(density(china1[china1$PT==1,]$university, na.rm=T), xlab="", ylim=c(0,2), main="", axes=F, ylab="", lty=2)
legend("topleft", legend=c("Treatment 0", "Treatment PT"), lty=c(1,2), cex=1, bty="n")
xticks <- seq(0, 1, 1)
axis(1, at=xticks)

#Party affiliation
plot(density(china1[china1$PT==0,]$partyMember, na.rm=T), xlab="Communist Party affiliation (0 = No, 1 = Yes)", ylim=c(0,2.8), xaxt='n', main="Party affiliation", lty=1, cex=1.2)
par(new=TRUE)
plot(density(china1[china1$PT==1,]$partyMember, na.rm=T), xlab="", ylim=c(0,2.8), main="", axes=F, ylab="", lty=2)
legend("topright", legend=c("Treatment 0", "Treatment PT"), lty=c(1,2), cex=1, bty="n")
xticks <- seq(0, 1, 1)
axis(1, at=xticks)

#Interest in politics
plot(density(china1[china1$PT==0,]$interest, na.rm=T), xlab="Interest in politics", ylim=c(0,1), xaxt='n', main="Interest in politics", lty=1, cex=1.2)
par(new=TRUE)
plot(density(china1[china1$PT==1,]$interest, na.rm=T), xlab="", ylim=c(0,1), main="", axes=F, ylab="", lty=2)
legend("topleft", legend=c("Treatment 0", "Treatment PT"), lty=c(1,2), cex=1, bty="n")
xticks <- seq(0, 3, 1)
axis(1, at=xticks)

#Marital status
plot(density(china1[china1$PT==0,]$married, na.rm=T), xlab="Married (0 = No, 1 = Yes)", ylim=c(0,3), xaxt='n', main="Marital status", lty=1, cex=1.2)
par(new=TRUE)
plot(density(china1[china1$PT==1,]$married, na.rm=T), xlab="", ylim=c(0,3), main="", axes=F, ylab="", lty=2)
legend("topleft", legend=c("Treatment 0", "Treatment PT"), lty=c(1,2), cex=1, bty="n")
xticks <- seq(0, 1, 1)
axis(1, at=xticks)

#Knowledge
china1$know <- china1$know1*4
plot(density(china1[china1$PT==0,]$know, na.rm=T), xlab="Knowledge", ylim=c(0,1.15), xaxt='n', main="Knowledge", lty=1, cex=1.2)
par(new=TRUE)
plot(density(china1[china1$PT==1,]$know, na.rm=T), xlab="", ylim=c(0,1.15), main="", axes=F, ylab="", lty=2)
legend("topleft", legend=c("Treatment 0", "Treatment PT"), lty=c(1,2), cex=1, bty="n")
xticks <- seq(0, 4, 1)
axis(1, at=xticks)

dev.off()


### Figure 3: Balance tests: US experiment

pdf("figa3.pdf", height=8, width=11)
par(mfrow=c(2,3))

#Age
plot(density(usa1[usa1$PT==0,]$age, na.rm=T), xlab="Age", ylim=c(0,0.032), xlim=c(10,90), main="Age", lty=1, cex=1.2)
par(new=TRUE)
plot(density(usa1[usa1$PT==1,]$age, na.rm=T), xlab="", ylim=c(0,0.032), xlim=c(10,90), main="", axes=F, ylab="", lty=2)
legend("topleft", legend=c("Treatment 0", "Treatment PT"), lty=c(1,2), cex=1, bty="n")

#Gender
plot(density(usa1[usa1$PT==0,]$male, na.rm=T), xlab="Gender (1 = Male)", ylim=c(0,2), xaxt='n', main="Gender", lty=1, cex=1.2)
par(new=TRUE)
plot(density(usa1[usa1$PT==1,]$male, na.rm=T), xlab="", ylim=c(0,2), main="", axes=F, ylab="", lty=2)
legend("topright", legend=c("Treatment 0", "Treatment PT"), lty=c(1,2), cex=1, bty="n")
xticks <- seq(0, 1, 1)
axis(1, at=xticks)

#Party ID
plot(density(usa1[usa1$PT==0,]$pid1, na.rm=T), xlab="Party ID", ylim=c(0,2), xaxt='n', main="Party ID", lty=1, cex=1.2)
par(new=TRUE)
plot(density(usa1[usa1$PT==1,]$pid1, na.rm=T), xlab="", ylim=c(0,2), main="", axes=F, ylab="", lty=2)
legend("topleft", legend=c("Treatment 0", "Treatment PT"), lty=c(1,2), cex=1, bty="n")
xticks <- c(0, 0.5, 1)
axis(1, c("Strong Democrat", "Independent", "Strong Republican"), at=xticks)

#Income
plot(density(usa1[usa1$PT==0,]$income1, na.rm=T), xlab="Household income", ylim=c(0,2.5), xlim=c(-0.2, 1.2), xaxt='n', main="Household income", lty=1, cex=1.2)
par(new=TRUE)
plot(density(usa1[usa1$PT==1,]$income1, na.rm=T), xlab="", ylim=c(0,2.5), xlim=c(-0.2, 1.2), main="", axes=F, ylab="", lty=2)
legend("topright", legend=c("Treatment 0", "Treatment PT"), lty=c(1,2), cex=1, bty="n")
xticks <- seq(0, 1, 0.5)
axis(1, at=xticks)

#Race
plot(density(usa1[usa1$PT==0,]$White, na.rm=T), xlab="Race (White = 1)", ylim=c(0,3.5), xlim=c(-0.2, 1.2), xaxt='n', main="Race", lty=1, cex=1.2)
par(new=TRUE)
plot(density(usa1[usa1$PT==1,]$White, na.rm=T), xlab="", ylim=c(0,3.5), xlim=c(-0.2, 1.2), main="", axes=F, ylab="", lty=2)
legend("topleft", legend=c("Treatment 0", "Treatment PT"), lty=c(1,2), cex=1, bty="n")
xticks <- seq(0, 1, 1)
axis(1, at=xticks)

dev.off()

##### Exploiting natural variation in perspective-taking

### Table 5: The dispositional perspective-taking results offer little support for the palliative effects of perspective-taking hypothesis

#US sample
mod1ch0 <- lm(policyChoice ~ iri1 + PT, data=subset(usa1, usa1$ch_incr==0))
mod2ch0 <- lm(policyChoice ~ iri1 + PT + milAssert1 +natAboutYou +  educ + male + ideo1, data=subset(usa1, usa1$ch_incr==0))
mod3ch0 <- lm(policyChoice ~ iri1+ PT*iri1 + milAssert1 +natAboutYou  + educ + male + ideo1, data=subset(usa1, usa1$ch_incr==0))
mod1ch1 <- lm(policyChoice ~ iri1 + PT, data=subset(usa1, usa1$ch_incr==1))
mod2ch1 <- lm(policyChoice ~ iri1 + PT + milAssert1 +natAboutYou  + educ+ male + ideo1, data=subset(usa1, usa1$ch_incr==1))
mod3ch1 <- lm(policyChoice ~ iri1 + PT*iri1 + milAssert1 +natAboutYou  + educ + male + ideo1, data=subset(usa1, usa1$ch_incr==1))
stargazer(mod1ch0, mod2ch0, mod3ch0, mod1ch1, mod2ch1, mod3ch1, title="The dispositional perspective-taking results offer little support for the palliative effects of perspective-taking hypothesis", omit.stat=c("LL", "ser", "f"), style="apsr", digits=3, label="tab:a5", covariate.labels=c("Dispositional empathy", "Perspective taking", "Military assertiveness", "National chauvinism",  "Education", "Male", "Ideology", "Dispositional empathy x PT", "Constant"), dep.var.labels.include=FALSE) 

#Chinese sample
mod1am0 <- lm(policyChoice ~ iri1 + PT, data=subset(china1,am_incr==0))
mod2am0 <- lm(policyChoice ~ iri1 + PT +  milAssert1 + natAboutYou  + educ +  male + partyMember, data=subset(china1,am_incr==0))
mod3am0 <- lm(policyChoice ~ iri1 + PT*iri1 + milAssert1 + natAboutYou  + educ + male + partyMember, data=subset(china1,am_incr==0))
mod1am1 <- lm(policyChoice ~ iri1 + PT, data=subset(china1,am_incr==1))
mod2am1 <- lm(policyChoice ~ iri1 + PT + milAssert1  + natAboutYou+ educ + male + partyMember, data=subset(china1,am_incr==1))
mod3am1 <- lm(policyChoice ~ iri1 + PT*iri1 + milAssert1  + natAboutYou + educ  + male + partyMember, data=subset(china1,am_incr==1))
stargazer(mod1am0, mod2am0, mod3am0, mod1am1, mod2am1, mod3am1, omit.stat=c("LL", "ser", "f"), style="apsr", digits=3, label="tab:a5",covariate.labels=c("Dispositional Empathy", "Perspective-taking", "Military assertiveness", "National chauvinism", "Education", "Male", "Party Member", "Dispositional empathy x PT", "Constant"), dep.var.labels.include=FALSE) 


##### STM results in the escalation conditions

#First, structural topic model results: United States (China escalates condition)
us.dat <- usa1[c("policyChoice", "ch_incr", "PT", "policyExpl")]
meta.data <- usa1[c("policyChoice", "ch_incr", "PT")]

docs1 <- usa1[c("policyExpl")]
Corpus <- Corpus(VectorSource(usa1$policyExpl))

#Clean corpus
Corpus.clean <- tm_map(Corpus, stripWhitespace)
Corpus.clean <- tm_map(Corpus.clean, removePunctuation)
Corpus.clean <- tm_map(Corpus.clean, tolower)
dataframe<-data.frame(text=unlist(sapply(Corpus.clean, `[`)), stringsAsFactors=F)

clean.data <- cbind(meta.data, dataframe)

## Remove offending characters that cause errors
for(i in 1:nrow(clean.data)){
  clean.data$text[i] <- gsub("[^[:alnum:]///' ]", "", clean.data$text[i])
}

incr.dat <- subset(clean.data, clean.data$ch_incr==1)
processed <- textProcessor(incr.dat$text, metadata=incr.dat)

mod.expl.pt1 <- selectModel(processed$documents, processed$vocab, K=7, prevalence = ~ PT, max.em.its=300, runs=20, data=processed$meta, init.type="Spectral", seed=43215) 

o.expl.pt1 <- mod.expl.pt1$runout[[1]]

expl.pt1.effect <- estimateEffect(1:7 ~ PT, o.expl.pt1, meta=processed$meta, uncertainty="Global")
plot(expl.pt1.effect, covariate="PT", topics=c(1:7), method="difference", cov.value1=1, cov.value2=0, xlab="Change in topical prevalence from PT to control") 
#No treatments significant

### Now, structural topic model results: China (US escalates condition)

Sys.setlocale(category = "LC_CTYPE", locale = "zh_CN.utf-8")
#Produce slimmer dataframe
cn.dat <- china1[c("policyChoice", "am_incr", "PT", "policyExpl")]

#Preprocess using jieba
require(jiebaR)
cn.dat$seg_policyExpl <- segment(cn.dat$policyExpl, worker(bylines = TRUE))
cn.dat$seg_policyExpl <- gsub("^c\\(|\\)$", "", cn.dat$seg_policyExpl) # clear 'c(' in the beginning and ')' in the end of the string
cn.dat$seg_policyExpl <- gsub(", ", " ", cn.dat$seg_policyExpl) # remove ,
cn.dat$seg_policyExpl <- gsub('"', '', cn.dat$seg_policyExpl) # remove "

#Address missingness and observations that use English letters but Chinese text

#Drop two missing observations 
cn.dat <- cn.dat[!is.na(cn.dat$am_incr),]

#Drop observations that use random English letters/phrases
j <- c(76,104,153,219,317,319,449,549,605,651,703,758,781,833,865,962,1181,1190,1208,1212)

cn.dat$seg_policyExpl[j] <- NA

expl1.CN <- textProcessor(cn.dat$seg_policyExpl[which(cn.dat$am_incr==1)], metadata=cn.dat[which(cn.dat$am_incr==1),], lowercase=FALSE, removestopwords = FALSE, removenumbers = TRUE, removepunctuation = TRUE, stem=FALSE, language="na") #Create corpus
expl.CN1p <- prepDocuments(expl1.CN$documents, expl1.CN$vocab, meta=expl1.CN$meta) #Finish corpus, removing low-frequency terms

mod.expl.pt1p <- selectModel(expl.CN1p$documents, expl.CN1p$vocab, K=7, prevalence = ~ PT, max.em.its=300, runs=20, data=expl.CN1p$meta, init.type="Spectral", seed=43215) 

o.expl.pt1p <- mod.expl.pt1p$runout[[1]]

## Figure 4a: Structural topic model results: China (United States escalates condition)

expl.pt1p.effect <- estimateEffect(1:7 ~ PT, o.expl.pt1p, meta=expl.CN1p$meta, uncertainty="Global")
plot(expl.pt1p.effect, covariate="PT", topics=c(6), method="difference", cov.value1=1, cov.value2=0, xlab="Change in topical prevalence from PT to control", xlim=c(-0.1,0.3)) 

## Quotes for Figure 4b: Intrinsic interests

o.expl.pt1p.thoughts6 <- findThoughts(o.expl.pt1p, texts=expl.CN1p$meta$policyExpl, topics=6, n=10)
o.expl.pt1p.thoughts6

###### Rumination effects

## US sample
summary(lm(policyChoice ~ lscreen3_t, data=subset(usa1, usa1$PT==0 & usa1$ch_incr==0))) #B = -0.07, p < 0.35
summary(lm(policyChoice ~ lscreen3_t + male + age + White + income1 + educ1 + educ2 + educ3, data=subset(usa1, usa1$PT==0 & usa1$ch_incr==0))) #B = -0.01, p < 0.91

### Chinese sample
china1$lscreen3_t[which(china1$lscreen3_t <2)] <- NA #drop the -Inf

summary(lm(policyChoice ~ lscreen3_t, data=subset(china1, china1$PT==0 & china1$am_incr==0))) #B = -0.31, p < 0.01
summary(lm(policyChoice ~ lscreen3_t + male + age + Han + educ, data=subset(china1, china1$PT==0 & china1$am_incr==0))) #B= -0.29, p < 0.02

summary(lm(policyChoice ~ lscreen3_t, data=subset(china1, china1$PT==0 & china1$am_incr==1))) #B = -0.15, p < 0.09
summary(lm(policyChoice ~ lscreen3_t + male + age + Han + educ, data=subset(china1, china1$PT==0 & china1$am_incr==1))) #B = -0.15, p < 0.085

####### Follow-up attribution asymmetry experiment: Hawaii

usa2$attrBias <- usa2$attrChInc - usa2$attrUsInc #Attribution asymmetry

#Compare American perceptions of Chinese escalations in Hawaii vs South China Sea
combined.df2 <- data.frame(asymmetry=c(usa1$attrChInc, usa2$attrChInc), hawaii=c(rep(0,nrow(usa1)), rep(1,nrow(usa2))))
t.test(asymmetry ~ hawaii, data=combined.df2) #p < 0.001

#Chinese attribution asymmetry vs American asymmetry in Study 1
CH1.asym <- mean(china1$attrBias, na.rm=TRUE)
US1.asym <- mean(usa1$attrBias, na.rm=TRUE) 
round(CH1.asym/US1.asym, digits=1) # Attribution asymmetry 3.1 times greater in China than in the US

#Chinese attribution asymmetry vs American asymmetry in Study 2
round(CH1.asym/mean(usa2$attrBias, na.rm=TRUE), digits=1) #2.2 times larger in China than in US sample in hawaii experiment

#Compare American perceptions of American escalations in Hawaii vs South China Sea
combined.df2 <- data.frame(asymmetry=c(usa1$attrUsInc, usa2$attrUsInc), hawaii=c(rep(0,nrow(usa1)), rep(1,nrow(usa2))))
t.test(asymmetry ~ hawaii, data=combined.df2) #p < 0.2

#Compare American attribution asymmetries in Hawaii vs South China Sea
combined.df2 <- data.frame(asymmetry=c(usa1$attrBias, usa2$attrBias), hawaii=c(rep(0,nrow(usa1)), rep(1,nrow(usa2))))
t.test(asymmetry ~ hawaii, data=combined.df2) #t=-2.89; p < 0.004

